{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-1-6b0e511ae5c7>:12: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "from IPython.display import Image\n",
    "import pandas as pd\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') #指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') #谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') #隐藏滚动条, 应对些特殊页面\n",
    "\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get(\"https://mp.weixin.qq.com\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "payload =  {\"account\": \"coldmeaning@163.com\", \"password\": \"ColdMeaning116\"}\n",
    "driver.find_element_by_xpath('//div[@class=\"login__type__container login__type__container__scan\"]/a').click()\n",
    "driver.find_element_by_xpath('//form[@class=\"login_form\"]//input[@name=\"account\"]').clear()\n",
    "driver.find_element_by_xpath('//form[@class=\"login_form\"]//input[@name=\"password\"]').clear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//form[@class=\"login_form\"]//input[@name=\"account\"]').send_keys(payload['account'])\n",
    "driver.find_element_by_xpath('//form[@class=\"login_form\"]//input[@name=\"password\"]').send_keys(payload['password'])\n",
    "driver.find_element_by_xpath('//div[@class=\"login_btn_panel\"]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.find_element_by_xpath('//*[@id=\"app\"]/div[3]/div/div[2]/div[1]/div/img').get_attribute(\"src\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//div[@class=\"weui-desktop-head\"]//a[@class=\"weui-desktop-btn__head-opr weui-desktop__unfold-menu-opr\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.execute_script(\"window.scrollTo(0,document.body.scrollHeight)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-menu_global-wrapper\"]//ul[@class=\"weui-desktop-sub-menu\"]/li[@title=\"图文素材\"]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-card__inner\"]/i').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"preview_media_add_panel\"]//li[@title=\"图文消息\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-AA9029052A4519AC22ABF0CE9AD2BB67',\n",
       " 'CDwindow-184C2A2F107D79474BD65841E4C11F57']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-12-6c6d5ce6602d>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"media_list_box_inner\"]/ul[@class=\"tpl_list\"]/li[@id=\"js_editor_insertlink\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('/html/body/div[2]/div/div/div/div/div[6]/div[2]/div[1]/div/div[2]/div[2]/form[1]/div[3]/div/div/p/div/button').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-link-dialog\"]//div[@class=\"weui-desktop-search weui-desktop-form__input_clear\"]//span[@class=\"weui-desktop-form__input-wrp\"]/input').clear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "keyword = \"广州日报\"\n",
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-link-dialog\"]//div[@class=\"weui-desktop-search weui-desktop-form__input_clear\"]//span[@class=\"weui-desktop-form__input-wrp\"]/input').send_keys(keyword)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-link-dialog\"]//div[@class=\"weui-desktop-search weui-desktop-form__input_clear\"]//button[@class=\"weui-desktop-icon-btn weui-desktop-search__btn\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-form__control-group\"]//div[@class=\"weui-desktop-search__panel\"]/ul/li[1]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>time</th>\n",
       "      <th>link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5月30日广州市新冠肺炎疫情情况</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>最新最全！广州276家“愿检尽检”检测服务点公布</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>搭乘广州跨市公交地铁，需要核酸证明吗？明确了</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>尚未接种疫苗老人注意！广东省疾控专家最新提醒</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>“马上来”“通宵也可以”......这个群聊记录，看哭了</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>刚刚，广东城际发布通知，今晚10点起实施</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>最新！到药店购买这些药品，需实名登记</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>非常危险！越南发现新冠病毒变异毒株混合体，能在空气中迅速传播！</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>5月31日广东省新冠肺炎疫情情况</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>佛山市5月30日新冠肺炎疫情情况</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>离穗最新通告公布！官方详解来了</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>速看！天河区282个核酸检测点公布</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>增城区发布疫情防控工作通告！</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>南沙区疫情防控指挥部发布最新提示！</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>最全！广东全省新冠病毒疫苗预约接种方式公布</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>出行注意！广州17条公交线路暂停营运！多条线路有变化 | 新闻早茶</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>刚刚，广州天河区发布开展大规模核酸检测工作通告！涉及这些区域</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>从化区发布最新通告！今日这两个小区人员需进行核酸检测</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>广州市疾控中心发布重要提醒！</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>深圳最新通报！</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>逾百万元大奖等你来！河套深港科技创新合作区深圳园区VI形象设计及地标标识物设计有奖征集中</td>\n",
       "      <td>2021-05-31</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>广州市新型冠状病毒肺炎疫情防控指挥部关于加强离穗车辆和人员管理的通告（第14号）</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>广州市教育局刚刚通知！</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>海珠区公布5月31日核酸检测采样点</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>越秀区18条街道社区核酸检测点公布</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>广州街坊爆料，“全过程不到十分钟”</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>刚刚通报，广州将扩大核酸检测范围！</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>番禺区发布疫情分级分类防控通告</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>周知！海珠区明后两天暂停新冠病毒疫苗接种</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>紧急扩散！收到这样的“疫苗信息”立即删除，近期高发</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>白云区多地启动全员检测核酸</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>核酸检测采样前请先做这件事</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           title        time  \\\n",
       "0                               5月30日广州市新冠肺炎疫情情况  2021-05-31   \n",
       "1                       最新最全！广州276家“愿检尽检”检测服务点公布  2021-05-31   \n",
       "2                         搭乘广州跨市公交地铁，需要核酸证明吗？明确了  2021-05-31   \n",
       "3                         尚未接种疫苗老人注意！广东省疾控专家最新提醒  2021-05-31   \n",
       "4                   “马上来”“通宵也可以”......这个群聊记录，看哭了  2021-05-31   \n",
       "5                           刚刚，广东城际发布通知，今晚10点起实施  2021-05-31   \n",
       "6                             最新！到药店购买这些药品，需实名登记  2021-05-31   \n",
       "7                非常危险！越南发现新冠病毒变异毒株混合体，能在空气中迅速传播！  2021-05-31   \n",
       "8                               5月31日广东省新冠肺炎疫情情况  2021-05-31   \n",
       "9                               佛山市5月30日新冠肺炎疫情情况  2021-05-31   \n",
       "10                               离穗最新通告公布！官方详解来了  2021-05-31   \n",
       "11                             速看！天河区282个核酸检测点公布  2021-05-31   \n",
       "12                                增城区发布疫情防控工作通告！  2021-05-31   \n",
       "13                             南沙区疫情防控指挥部发布最新提示！  2021-05-31   \n",
       "14                         最全！广东全省新冠病毒疫苗预约接种方式公布  2021-05-31   \n",
       "15             出行注意！广州17条公交线路暂停营运！多条线路有变化 | 新闻早茶  2021-05-31   \n",
       "16                刚刚，广州天河区发布开展大规模核酸检测工作通告！涉及这些区域  2021-05-31   \n",
       "17                    从化区发布最新通告！今日这两个小区人员需进行核酸检测  2021-05-31   \n",
       "18                                广州市疾控中心发布重要提醒！  2021-05-31   \n",
       "19                                       深圳最新通报！  2021-05-31   \n",
       "20  逾百万元大奖等你来！河套深港科技创新合作区深圳园区VI形象设计及地标标识物设计有奖征集中  2021-05-31   \n",
       "21      广州市新型冠状病毒肺炎疫情防控指挥部关于加强离穗车辆和人员管理的通告（第14号）  2021-05-30   \n",
       "22                                   广州市教育局刚刚通知！  2021-05-30   \n",
       "23                             海珠区公布5月31日核酸检测采样点  2021-05-30   \n",
       "24                             越秀区18条街道社区核酸检测点公布  2021-05-30   \n",
       "25                             广州街坊爆料，“全过程不到十分钟”  2021-05-30   \n",
       "26                             刚刚通报，广州将扩大核酸检测范围！  2021-05-30   \n",
       "27                               番禺区发布疫情分级分类防控通告  2021-05-30   \n",
       "28                          周知！海珠区明后两天暂停新冠病毒疫苗接种  2021-05-30   \n",
       "29                     紧急扩散！收到这样的“疫苗信息”立即删除，近期高发  2021-05-30   \n",
       "30                                 白云区多地启动全员检测核酸  2021-05-30   \n",
       "31                                 核酸检测采样前请先做这件事  2021-05-30   \n",
       "\n",
       "                                                 link  \n",
       "0   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "2   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "3   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "4   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "5   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "6   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "7   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "8   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "9   http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "10  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "11  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "12  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "13  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "14  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "15  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "16  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "17  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "18  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "19  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "20  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "21  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "22  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "23  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "24  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "25  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "26  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "27  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "28  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "29  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "30  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "31  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "抓取 = driver.find_element_by_xpath('//div[@class=\"weui-desktop-media__list-wrp\"]')\n",
    "links = [i.get_attribute(\"href\")for i in driver.find_elements_by_xpath('//div[@class=\"weui-desktop-radio-group\"]//span[@class=\"weui-desktop-vm_default\"]//a')]\n",
    "info_str = 抓取.get_attribute('innerText')\n",
    "info = info_str.replace('\\t','s').replace('\\n','s').replace('sssss', 's').replace('ss', 's').split('s')[1:-1]\n",
    "title = info[::2]\n",
    "time = info[1::2]\n",
    "info_dict = {\n",
    "    \"title\" : title,\n",
    "    \"time\" : time,\n",
    "    \"link\" : links\n",
    "}\n",
    "data_all = pd.DataFrame(info_dict)\n",
    "data_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"weui-desktop-pagination\"]//a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from time import sleep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pages_str = driver.find_element_by_xpath('//div[@class=\"weui-desktop-pagination\"]//label[2]').get_attribute('innerText')\n",
    "# pages = int(pages_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "pages = 50\n",
    "\n",
    "title_all = []\n",
    "time_all = []\n",
    "links_all = []\n",
    "\n",
    "for i in range(pages-2):\n",
    "    抓取 = driver.find_element_by_xpath('//div[@class=\"weui-desktop-media__list-wrp\"]')\n",
    "    links = [i.get_attribute(\"href\")for i in driver.find_elements_by_xpath('//div[@class=\"weui-desktop-radio-group\"]//span[@class=\"weui-desktop-vm_default\"]//a')]\n",
    "    info_str = 抓取.get_attribute('innerText')    \n",
    "    info = info_str.replace('\\t','s').replace('\\n','s').replace('sssss', 's').replace('ss', 's').split('s')[1:-1]\n",
    "    \n",
    "    title = info[::2]\n",
    "    time = info[1::2]\n",
    "    title_all.append(title)\n",
    "    time_all.append(time)\n",
    "    links_all.append(links)\n",
    "   \n",
    "    driver.find_element_by_xpath('//*[@id=\"vue_app\"]/div[2]/div[1]/div/div[2]/div[2]/form[1]/div[4]/div/div/div[3]/span[1]/a[2]').click()    \n",
    "    sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_title=[]\n",
    "for x in title_all:\n",
    "    for y in x:\n",
    "        new_title.append(y)\n",
    "        \n",
    "new_time=[]\n",
    "for x in time_all:\n",
    "    for y in x:\n",
    "        new_time.append(y) \n",
    "        \n",
    "new_links=[]\n",
    "for x in links_all:\n",
    "    for y in x:\n",
    "        new_links.append(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "while '查看文章' in new_title:\n",
    "    new_title.remove('查看文章')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "while '' in new_time:\n",
    "    new_time.remove('')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1120"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_links)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1120 1120 1120\n"
     ]
    }
   ],
   "source": [
    "print(len(new_title),len(new_time),len(new_links))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>time</th>\n",
       "      <th>link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5月29日广州市新冠肺炎疫情情况</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>​广州市文明办发布《致广州市民的防疫倡议书》</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5月30日广东省新冠肺炎疫情情况</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>广州海珠区今日起开展全区全员核酸检测</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>广州越秀区今日起开展全区全员核酸检测</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1115</th>\n",
       "      <td>肯德基就餐喝到消毒水导致急性糜烂性胃炎，索赔还要求签保密协议？</td>\n",
       "      <td>2021-04-11</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1116</th>\n",
       "      <td>结不起婚？广州等15地下手了</td>\n",
       "      <td>2021-04-11</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1117</th>\n",
       "      <td>是真的！猪真的飞起来了！</td>\n",
       "      <td>2021-04-11</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1118</th>\n",
       "      <td>广东这位导师的“卑微”式招生火了！“不喜欢，我可以改”</td>\n",
       "      <td>2021-04-11</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1119</th>\n",
       "      <td>男子疑在家“孤独死”，近十年后才被人发现</td>\n",
       "      <td>2021-04-11</td>\n",
       "      <td>http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1120 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                title        time  \\\n",
       "0                    5月29日广州市新冠肺炎疫情情况  2021-05-30   \n",
       "1              ​广州市文明办发布《致广州市民的防疫倡议书》  2021-05-30   \n",
       "2                    5月30日广东省新冠肺炎疫情情况  2021-05-30   \n",
       "3                  广州海珠区今日起开展全区全员核酸检测  2021-05-30   \n",
       "4                  广州越秀区今日起开展全区全员核酸检测  2021-05-30   \n",
       "...                               ...         ...   \n",
       "1115  肯德基就餐喝到消毒水导致急性糜烂性胃炎，索赔还要求签保密协议？  2021-04-11   \n",
       "1116                   结不起婚？广州等15地下手了  2021-04-11   \n",
       "1117                     是真的！猪真的飞起来了！  2021-04-11   \n",
       "1118      广东这位导师的“卑微”式招生火了！“不喜欢，我可以改”  2021-04-11   \n",
       "1119             男子疑在家“孤独死”，近十年后才被人发现  2021-04-11   \n",
       "\n",
       "                                                   link  \n",
       "0     http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1     http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "2     http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "3     http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "4     http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "...                                                 ...  \n",
       "1115  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1116  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1117  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1118  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "1119  http://mp.weixin.qq.com/s?__biz=MjM5MjA0MDk2MA...  \n",
       "\n",
       "[1120 rows x 3 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "info_dict = {\n",
    "    \"title\" : new_title,\n",
    "    \"time\" : new_time,\n",
    "    \"link\" : new_links\n",
    "}\n",
    "data_zong = pd.DataFrame(info_dict)\n",
    "data_zong"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
